library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Load spatial data
require(sf) || install.packages("sf", dependencies = TRUE)
## Loading required package: sf
## Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
## [1] TRUE
library(sf)
require(mapview) || install.packages("mapview", dependencies = TRUE)
## Loading required package: mapview
## [1] TRUE
library(mapview)
mapviewOptions(fgb = FALSE)
require(leafsync) || install.packages("leafsync", dependencies = TRUE)
## Loading required package: leafsync
## [1] TRUE
library(leafsync)
require(maps) || install.packages("maps", dependencies = TRUE)
## Loading required package: maps
##
## Attaching package: 'maps'
##
## The following object is masked from 'package:purrr':
##
## map
## [1] TRUE
library(maps)
PersonData <- read_rds('../data/PersonData_111A.Rds')
HHData <- read_rds('../data/HHData_111A.Rds')
hh_bgDensity <- read_rds('../data/hh_bgDensity.Rds')
personHHData <- left_join(PersonData, HHData) %>%
left_join(hh_bgDensity)
## Joining with `by = join_by(hhid)`
## Joining with `by = join_by(hhid)`
county_shp <- st_read("../data/counties/counties.shp")
## Reading layer `counties' from data source
## `/Users/Rebecca/PSTAT 197/vignette-householdclassification/data/counties/counties.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 58 features and 17 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -124.3926 ymin: 32.53578 xmax: -114.1312 ymax: 42.00952
## Geodetic CRS: WGS 84
plot(county_shp["NAME"])
The unique colors correspond to the unique names of each county.
names(personHHData)
## [1] "hhid" "pnum" "Male"
## [4] "Age" "persHisp" "persWhite"
## [7] "persAfricanAm" "persNativeAm" "persAsian"
## [10] "persPacIsl" "persOthr" "persDKrace"
## [13] "persRFrace" "bornUSA" "DriverLic"
## [16] "TransitPass" "Employed" "WorkFixedLoc"
## [19] "WorkHome" "WorkNonfixed" "WorkDaysWk"
## [22] "TypicalHoursWk" "FlexSched" "FlexPrograms"
## [25] "Disability" "DisLicensePlt" "TransitTripsWk"
## [28] "WalkTripsWk" "BikeTripsWk" "Student"
## [31] "WorkMode" "SchoolMode" "EducationCompl"
## [34] "workday" "Walk_Dist" "Bike_Dist"
## [37] "DriveAlone_Dist" "Driveothers_Dist" "Passenger_Dist"
## [40] "Plane_Dist" "Allothers_Dist" "Walk_trips"
## [43] "Bike_trips" "DriveAlone_trips" "Driveothers_trips"
## [46] "Passenger_trips" "Plane_trips" "Allothers_trips"
## [49] "Sum_Trips" "Sum_PMT" "CTFIP"
## [52] "County" "MPO" "City"
## [55] "DOW" "HH_size" "HH_nTrips"
## [58] "HH_nEmployees" "HH_nStudents" "HH_nLicenses"
## [61] "HH_nCars" "HH_nBikes" "HH_income"
## [64] "HH_anyTransitRider" "HH_homeType" "HH_homeowner"
## [67] "HH_isHispanic" "HH_intEnglish" "bg_density"
## [70] "bg_group"
Taking some of the CHTS person and household characteristics and aggregating them to the county level so we can map the traits of our CHTS respondents county-by-county.
Get the total count of how many people in our survey come from the various counties of California:
personHHData %>%
group_by(CTFIP, County) %>% # added County to this grouping so we can see the county names
summarise(count=n())
## `summarise()` has grouped output by 'CTFIP'. You can override using the
## `.groups` argument.
## # A tibble: 58 × 3
## # Groups: CTFIP [58]
## CTFIP County count
## <dbl> <chr> <int>
## 1 6001 Alameda 3531
## 2 6003 Alpine 44
## 3 6005 Amador 339
## 4 6007 Butte 774
## 5 6009 Calaveras 338
## 6 6011 Colusa 216
## 7 6013 Contra Costa 2951
## 8 6015 Del Norte 412
## 9 6017 El Dorado 863
## 10 6019 Fresno 2669
## # ℹ 48 more rows
We get the county-by-county means of all the variables of interest.
Taking the means instead of just getting sums accounts for the fact that
there are not equal amounts of people surveyed from each county. We also
create a new column count to this dataset so we can carry
over the counts of how many people were surveyed from each county.
prhh_aggreg <- personHHData %>%
group_by(County, CTFIP) %>%
mutate(count = n()) %>% # the new column called 'count'
summarise_at(vars(-hhid, -pnum, -bg_group), mean)
## Warning: There were 348 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `WorkMode = (function (x, ...) ...`.
## ℹ In group 1: `County = "Alameda"`, `CTFIP = 6001`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 347 remaining warnings.
county_prhh_shp <- county_shp %>% left_join(prhh_aggreg)
## Joining with `by = join_by(CTFIP)`
Newly-created county_prhh_shp is also an sf
object so it has spatial attributes and can be mapped.
mapview(county_prhh_shp, # the dataset to use
zcol = "bornUSA", # tells it which column to map
legend = TRUE, # if FALSE, it won't show the legend
label = as.character(county_prhh_shp$NAME), # tells it the column whose value you want to appear when you hover over a shape with your mouse
popup = leafpop::popupTable(x = county_prhh_shp, zcol = c("bornUSA", "count")) # determines what is included in the popup window when you click on a shape
)
Notice that the percentage of respondents who were born in the USA increases as you look at more northern counties. However, also notice that more urban counties have lower percentages, regardless of their position in the state.
4 maps for each residential location type
county_bg_aggreg <- personHHData %>%
group_by(County, CTFIP, bg_group) %>% # group by county, CTFIP, and also bg_group
mutate(count = n()) %>%
summarise_at(vars(-hhid, -pnum), mean)
## Warning: There were 930 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `WorkMode = (function (x, ...) ...`.
## ℹ In group 1: `County = "Alameda"`, `CTFIP = 6001`, `bg_group = Urban`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 929 remaining warnings.
county_bg_shp <- county_shp %>%
merge(data.frame(bg_group = c("Urban", "Suburban", "Exurban", "Rural"))) %>%
left_join(county_bg_aggreg)
## Joining with `by = join_by(CTFIP, bg_group)`
Sum Trips by Residential Area
urban_TripMap <- mapview(filter(county_bg_shp, bg_group == "Urban"),
zcol = "Sum_Trips", legend = TRUE, popup = NULL,
layer.name = "Urban Trips")
suburb_TripMap <- mapview(filter(county_bg_shp, bg_group == "Suburban"),
zcol = "Sum_Trips", legend = TRUE, popup = NULL,
layer.name = "Suburban Trips")
exurb_TripMap <- mapview(filter(county_bg_shp, bg_group == "Exurban"),
zcol = "Sum_Trips", legend = TRUE, popup = NULL,
layer.name = "Exurban Trips")
rural_TripMap <- mapview(filter(county_bg_shp, bg_group == "Rural"),
zcol = "Sum_Trips", legend = TRUE, popup = NULL,
layer.name = "Rural Trips")
latticeview(urban_TripMap, suburb_TripMap, exurb_TripMap, rural_TripMap, sync = "all")